Doing Zone-based visualizations
plot_zone_per_pitchtype_RHP_rev <- function(model, data,
pitch_cluster,
pitch_type = NULL,
title = NULL) {
# Strike zone bounds
k_zone_height_max <- 3.67
k_zone_height_min <- 1.52
sides <- c(-0.83, 0.83)
# Define zone width/height and breakpoints
zone_width <- diff(sides)
zone_height <- k_zone_height_max - k_zone_height_min
x_breaks <- seq(sides[1] - zone_width/3,
sides[2] + zone_width/3,
length.out = 6)
y_breaks <- seq(k_zone_height_min - zone_height/3,
k_zone_height_max + zone_height/3,
length.out = 6)
# Cluster means for fixed covariates
cluster_summary_rr <- data %>%
filter(Cluster == pitch_cluster) %>%
summarise(across(c(spinrate, relspeed, inducedvertbreak,
horzbreak, initposx, initposz), ~mean(.x, na.rm=TRUE)))
# Prediction grid
grid <- expand.grid(
platelocside = seq(sides[1] - 0.5, sides[2] + 0.5, length.out = 300),
platelocheight = seq(k_zone_height_min - 0.5, k_zone_height_max + 0.5, length.out = 300)
)
for (col in names(cluster_summary_rr)) grid[[col]] <- cluster_summary_rr[[col]]
grid$Cluster <- pitch_cluster
# Predict probabilities
grid$pred_prob <- predict(model, newdata = grid, type = "response")
# Average within bins and compute boundaries
zone_summary <- grid %>%
mutate(
x_bin = cut(platelocside, breaks = x_breaks, include.lowest = TRUE),
y_bin = cut(platelocheight, breaks = y_breaks, include.lowest = TRUE)
) %>%
group_by(x_bin, y_bin) %>%
summarise(
mean_prob = mean(pred_prob, na.rm = TRUE),
x_min = min(platelocside),
x_max = max(platelocside),
y_min = min(platelocheight),
y_max = max(platelocheight),
.groups = "drop"
) %>%
mutate(
x_center = (x_min + x_max)/2,
y_center = (y_min + y_max)/2
)
# ---- Plot using real coordinates (rectangles) ----
p <- ggplot(zone_summary) +
geom_rect(
aes(xmin = x_min, xmax = x_max,
ymin = y_min, ymax = y_max,
fill = mean_prob),
color = "white", linewidth = 0.8
) +
geom_text(aes(x = x_center, y = y_center,
label = sprintf("%.2f", mean_prob)),
color = "white", size = 3.8) +
# True strike-zone outline
geom_rect(aes(xmin = sides[1], xmax = sides[2],
ymin = k_zone_height_min, ymax = k_zone_height_max),
color = "black", fill = NA, linewidth = 1.3) +
scale_fill_viridis_c(option = "C", direction = -1, limits = c(0, 0.5)) +
coord_equal() +
labs(
title = paste0(title, " — Pitch Profile: ", pitch_type,
" (Cluster ", pitch_cluster, ")"),
x = "Horizontal Location (ft, Catcher View)",
y = "Vertical Location (ft)",
fill = "Pred. Prob."
) +
theme_minimal(base_size = 14) +
theme(
panel.grid = element_blank(),
plot.title = element_text(size = 13, hjust = 0.5)
)
return(p)
}
plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 0,
"Slider/Cutter","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 1,
"4-Seam Fastball","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 2,
"Splitter/Changeup","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 3,
"Sinker/2-Seamer","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RR, RHP_RHH_by_cluster, 4,
"Curveball/Vertical Dropper","RHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 0,
"Slider/Cutter","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 1,
"4-Seam Fastball","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 2,
"Splitter/Changeup","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 3,
"Sinker/2-Seamer","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_RL, RHP_LHH_by_cluster, 4,
"Curveball/Vertical Dropper","RHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 0,
"Curveball/Vertical Dropper", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 1,
"4-Seam Fastball", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 2,
"Slider/Cutter", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 3,
"Splitter/Changeup", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LR, LHP_RHH_by_cluster, 4,
"Sinker/2-Seamer", "LHP vs RHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 0,
"Curveball/Vertical Dropper", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 1,
"4-Seam Fastball", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 2,
"Slider/Cutter", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 3,
"Splitter/Changeup", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

plot_zone_per_pitchtype_RHP_rev(GAM_LL, LHP_LHH_by_cluster, 4,
"Sinker/2-Seamer", "LHP vs LHH")
## Warning in geom_rect(aes(xmin = sides[1], xmax = sides[2], ymin = k_zone_height_min, : All aesthetics have length 1, but the data has 25 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
## a single row.

# RR
RHP_RHH_by_cluster$Prediction <- predict(GAM_RR,
newdata=RHP_RHH_by_cluster,
type="response")
pitch_types_ranked_RR <- RHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
pitch_types_ranked_RR
## # A tibble: 5 × 5
## Cluster mean_pred_prob se_pred_prob n Rank
## <dbl> <dbl> <dbl> <int> <int>
## 1 3 0.207 0.000320 34464 1
## 2 2 0.191 0.000728 8113 2
## 3 0 0.179 0.000291 38163 3
## 4 4 0.168 0.000421 22204 4
## 5 1 0.129 0.000216 41876 5
# RL
RHP_LHH_by_cluster$Prediction <- predict(GAM_RL,
newdata=RHP_LHH_by_cluster,
type="response")
pitch_types_ranked_RL <- RHP_LHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
pitch_types_ranked_RL
## # A tibble: 5 × 5
## Cluster mean_pred_prob se_pred_prob n Rank
## <dbl> <dbl> <dbl> <int> <int>
## 1 2 0.188 0.000426 19571 1
## 2 3 0.180 0.000366 24419 2
## 3 4 0.159 0.000458 15520 3
## 4 0 0.138 0.000335 20988 4
## 5 1 0.120 0.000198 42944 5
# LR
LHP_RHH_by_cluster$Prediction <- predict(GAM_LR,
newdata=LHP_RHH_by_cluster,
type="response")
pitch_types_ranked_LR <- LHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
pitch_types_ranked_LR
## # A tibble: 5 × 5
## Cluster mean_pred_prob se_pred_prob n Rank
## <dbl> <dbl> <dbl> <int> <int>
## 1 3 0.199 0.000484 13072 1
## 2 4 0.186 0.000385 15923 2
## 3 0 0.158 0.000544 8841 3
## 4 2 0.146 0.000381 12011 4
## 5 1 0.119 0.000234 23124 5
# LL
LHP_LHH_by_cluster$Prediction <- predict(GAM_LL,
newdata=LHP_LHH_by_cluster,
type="response")
pitch_types_ranked_LL <- LHP_LHH_by_cluster %>%
group_by(Cluster) %>%
summarise(
mean_pred_prob = mean(Prediction, na.rm=TRUE),
se_pred_prob = sd(Prediction, na.rm=TRUE) / sqrt(n()),
n = n()
) %>%
arrange(desc(mean_pred_prob)) %>%
mutate(Rank = row_number())
k_zone_height_max <- 3.67
k_zone_height_min <- 1.52
sides <- c(-0.83, 0.83)
zone_width <- diff(sides)
zone_height <- k_zone_height_max - k_zone_height_min
x_breaks <- seq(sides[1] - zone_width/3,
sides[2] + zone_width/3,
length.out = 6)
y_breaks <- seq(k_zone_height_min - zone_height/3,
k_zone_height_max + zone_height/3,
length.out = 6)
# Expected value of each pitch over our zone
grid <- expand.grid(
platelocside = seq(min(x_breaks), max(x_breaks), length.out = 300),
platelocheight = seq(min(y_breaks), max(y_breaks), length.out = 300)
)
cluster_means_LL <- LHP_LHH_by_cluster %>%
group_by(Cluster) %>%
summarise(across(c(spinrate, relspeed, inducedvertbreak,
horzbreak, initposx, initposz), \(x) mean(x, na.rm = TRUE)),
n = n())
# Integrate expected probabilities
zone_rank_LL <- cluster_means_LL %>%
mutate(
expected_prob = map_dbl(Cluster, \(clust) {
vals <- filter(cluster_means_LL, Cluster == clust)
grid_tmp <- grid
for (col in names(vals)[names(vals) != c("Cluster", "n")]) {
grid_tmp[[col]] <- vals[[col]]
}
grid_tmp$Cluster <- clust
preds <- predict(GAM_LL, newdata = grid_tmp, type = "response")
mean(preds, na.rm = TRUE)
})
) %>%
mutate(
relative_to_mean = expected_prob / mean(expected_prob, na.rm = TRUE)
) %>%
select(Cluster, n, expected_prob, relative_to_mean) %>%
arrange(desc(expected_prob))
zone_rank_LL
## # A tibble: 5 × 4
## Cluster n expected_prob relative_to_mean
## <dbl> <int> <dbl> <dbl>
## 1 4 7234 0.225 1.32
## 2 3 984 0.199 1.17
## 3 2 6687 0.157 0.925
## 4 1 7184 0.144 0.848
## 5 0 4282 0.124 0.732
cluster_means_LR <- LHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(across(c(spinrate, relspeed, inducedvertbreak,
horzbreak, initposx, initposz), \(x) mean(x, na.rm = TRUE)),
n = n())
# Integrate expected probabilities
zone_rank_LR <- cluster_means_LR %>%
mutate(
expected_prob = map_dbl(Cluster, \(clust) {
vals <- filter(cluster_means_LR, Cluster == clust)
grid_tmp <- grid
for (col in names(vals)[names(vals) != c("Cluster", "n")]) {
grid_tmp[[col]] <- vals[[col]]
}
grid_tmp$Cluster <- clust
preds <- predict(GAM_LR, newdata = grid_tmp, type = "response")
mean(preds, na.rm = TRUE)
})
) %>%
mutate(
relative_to_mean = expected_prob / mean(expected_prob, na.rm = TRUE)
) %>%
select(Cluster, n, expected_prob, relative_to_mean) %>%
arrange(desc(expected_prob))
zone_rank_LR
## # A tibble: 5 × 4
## Cluster n expected_prob relative_to_mean
## <dbl> <int> <dbl> <dbl>
## 1 4 15923 0.165 1.19
## 2 3 13072 0.146 1.06
## 3 2 12011 0.131 0.950
## 4 1 23124 0.129 0.936
## 5 0 8841 0.118 0.860
cluster_means_RL <- RHP_LHH_by_cluster %>%
group_by(Cluster) %>%
summarise(across(c(spinrate, relspeed, inducedvertbreak,
horzbreak, initposx, initposz), \(x) mean(x, na.rm = TRUE)),
n = n())
# Integrate expected probabilities
zone_rank_RL <- cluster_means_RL %>%
mutate(
expected_prob = map_dbl(Cluster, \(clust) {
vals <- filter(cluster_means_RL, Cluster == clust)
grid_tmp <- grid
for (col in names(vals)[names(vals) != c("Cluster", "n")]) {
grid_tmp[[col]] <- vals[[col]]
}
grid_tmp$Cluster <- clust
preds <- predict(GAM_RL, newdata = grid_tmp, type = "response")
mean(preds, na.rm = TRUE)
})
) %>%
mutate(
relative_to_mean = expected_prob / mean(expected_prob, na.rm = TRUE)
) %>%
select(Cluster, n, expected_prob, relative_to_mean) %>%
arrange(desc(expected_prob))
zone_rank_RL
## # A tibble: 5 × 4
## Cluster n expected_prob relative_to_mean
## <dbl> <int> <dbl> <dbl>
## 1 3 24419 0.153 1.14
## 2 2 19571 0.135 1.01
## 3 1 42944 0.132 0.987
## 4 0 20988 0.126 0.945
## 5 4 15520 0.122 0.911
cluster_means_RR <- RHP_RHH_by_cluster %>%
group_by(Cluster) %>%
summarise(across(c(spinrate, relspeed, inducedvertbreak,
horzbreak, initposx, initposz), \(x) mean(x, na.rm = TRUE)),
n = n())
# Integrate expected probabilities
zone_rank_RR <- cluster_means_RR %>%
mutate(
expected_prob = map_dbl(Cluster, \(clust) {
vals <- filter(cluster_means_RR, Cluster == clust)
grid_tmp <- grid
for (col in names(vals)[names(vals) != c("Cluster", "n")]) {
grid_tmp[[col]] <- vals[[col]]
}
grid_tmp$Cluster <- clust
preds <- predict(GAM_RR, newdata = grid_tmp, type = "response")
mean(preds, na.rm = TRUE)
})
) %>%
mutate(
relative_to_mean = expected_prob / mean(expected_prob, na.rm = TRUE)
) %>%
select(Cluster, n, expected_prob, relative_to_mean) %>%
arrange(desc(expected_prob))
zone_rank_RR
## # A tibble: 5 × 4
## Cluster n expected_prob relative_to_mean
## <dbl> <int> <dbl> <dbl>
## 1 3 34464 0.214 1.33
## 2 2 8113 0.163 1.01
## 3 0 38163 0.150 0.932
## 4 4 22204 0.140 0.871
## 5 1 41876 0.138 0.856
Pitch Ranking Great Table
library(gt)
pitch_types_left_left <- c("Sinker/2-Seam Fastball",
"Splitter/Changeup",
"Slider/Cutter",
"4-Seam Fastball",
"Curveball/Vertical Dropper")
pitch_types_ranked_LL <- zone_rank_LL %>%
mutate(
Pitch_Type = pitch_types_left_left,
Rank = row_number()
)
# Create the gt table
gt_LL <- pitch_types_ranked_LL %>%
gt() %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
expected_prob, relative_to_mean)) %>%
fmt_number(columns = c(expected_prob, relative_to_mean), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
expected_prob = "Expected Probability",
n = "Sample Size",
Rank = "Rank",
relative_to_mean = "Relative to Average"
) %>%
tab_header(
title = "RExpected Optimal GIDP Contact Probabilities by Pitch Profil",
subtitle = "LHP vs. LHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_LL$Rank
)
)
) %>%
data_color(
columns = c(relative_to_mean),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_LL$relative_to_mean
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, expected_prob, relative_to_mean))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
## Warning: Since gt v0.6.0 `fmt_missing()` is deprecated and will soon be removed.
## ℹ Use `sub_missing()` instead.
## This warning is displayed once every 8 hours.
## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `fn` argument instead.
## This warning is displayed once every 8 hours.
gt_LL
| RExpected Optimal GIDP Contact Probabilities by Pitch Profil |
| LHP vs. LHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Expected Probability |
Relative to Average |
Sample Size |
| 1 |
Sinker/2-Seam Fastball |
4 |
0.225 |
1.322 |
7234 |
| 2 |
Splitter/Changeup |
3 |
0.199 |
1.173 |
984 |
| 3 |
Slider/Cutter |
2 |
0.157 |
0.925 |
6687 |
| 4 |
4-Seam Fastball |
1 |
0.144 |
0.848 |
7184 |
| 5 |
Curveball/Vertical Dropper |
0 |
0.124 |
0.732 |
4282 |
pitch_types_left_right <- c("Sinker/2-Seam Fastball",
"Splitter/Changeup",
"Slider/Cutter",
"Curveball/Vertical Dropper",
"4-Seam Fastball")
pitch_types_ranked_LR <- zone_rank_LR %>%
mutate(
Pitch_Type = pitch_types_left_right,
Rank = row_number()
)
# Create the gt table
gt_LR <- pitch_types_ranked_LR %>%
gt() %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
expected_prob, relative_to_mean)) %>%
fmt_number(columns = c(expected_prob, relative_to_mean), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
expected_prob = "Expected Probability",
n = "Sample Size",
Rank = "Rank",
relative_to_mean = "Relative to Average"
) %>%
tab_header(
title = "Expected Optimal GIDP Contact Probabilities by Pitch Profil",
subtitle = "LHP vs. RHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_LR$Rank
)
)
) %>%
data_color(
columns = c(relative_to_mean),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_LR$relative_to_mean
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, expected_prob, relative_to_mean))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
gt_LR
| Expected Optimal GIDP Contact Probabilities by Pitch Profil |
| LHP vs. RHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Expected Probability |
Relative to Average |
Sample Size |
| 1 |
Sinker/2-Seam Fastball |
4 |
0.165 |
1.194 |
15923 |
| 2 |
Splitter/Changeup |
3 |
0.146 |
1.061 |
13072 |
| 3 |
Slider/Cutter |
2 |
0.131 |
0.950 |
12011 |
| 4 |
Curveball/Vertical Dropper |
1 |
0.129 |
0.936 |
23124 |
| 5 |
4-Seam Fastball |
0 |
0.118 |
0.860 |
8841 |
pitch_types_right_left <- c("Sinker/2-Seam Fastball",
"Splitter/Changeup",
"4-Seam Fastball",
"Slider/Cutter",
"Curveball/Vertical Dropper")
pitch_types_ranked_RL <- zone_rank_RL %>%
mutate(
Pitch_Type = pitch_types_right_left,
Rank = row_number()
)
# Create the gt table
gt_RL <- pitch_types_ranked_RL %>%
gt() %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
expected_prob, relative_to_mean)) %>%
fmt_number(columns = c(expected_prob, relative_to_mean), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
expected_prob = "Expected Probability",
n = "Sample Size",
Rank = "Rank",
relative_to_mean = "Relative to Average"
) %>%
tab_header(
title = "Expected Optimal GIDP Contact Probabilities by Pitch Profil",
subtitle = "RHP vs. LHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_RL$Rank
)
)
) %>%
data_color(
columns = c(relative_to_mean),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_RL$relative_to_mean
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, expected_prob, relative_to_mean))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
gt_RL
| Expected Optimal GIDP Contact Probabilities by Pitch Profil |
| RHP vs. LHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Expected Probability |
Relative to Average |
Sample Size |
| 1 |
Sinker/2-Seam Fastball |
3 |
0.153 |
1.144 |
24419 |
| 2 |
Splitter/Changeup |
2 |
0.135 |
1.013 |
19571 |
| 3 |
4-Seam Fastball |
1 |
0.132 |
0.987 |
42944 |
| 4 |
Slider/Cutter |
0 |
0.126 |
0.945 |
20988 |
| 5 |
Curveball/Vertical Dropper |
4 |
0.122 |
0.911 |
15520 |
pitch_types_right_right <- c("Sinker/2-Seam Fastball",
"Splitter/Changeup",
"Slider/Cutter",
"Curveball/Vertical Dropper",
"4-Seam Fastball")
pitch_types_ranked_RR <- zone_rank_RR %>%
mutate(
Pitch_Type = pitch_types_right_right,
Rank = row_number()
)
# Create the gt table
gt_RR <- pitch_types_ranked_RR %>%
gt() %>%
cols_move_to_start(columns = c(Rank, Pitch_Type, Cluster,
expected_prob, relative_to_mean)) %>%
fmt_number(columns = c(expected_prob, relative_to_mean), decimals = 3) %>%
cols_label(
Cluster = "Pitch Cluster",
Pitch_Type = "Pitch Profile",
expected_prob = "Expected Probability",
n = "Sample Size",
Rank = "Rank",
relative_to_mean = "Relative to Average"
) %>%
tab_header(
title = "Expected Optimal GIDP Contact Probabilities by Pitch Profile",
subtitle = "RHP vs. RHH Matchup"
) %>%
fmt_missing(everything(), missing_text = "—") %>%
data_color(
columns = c(Rank),
colors = scales::col_numeric(
palette = c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac"),
domain = range(
pitch_types_ranked_RR$Rank
)
)
) %>%
data_color(
columns = c(relative_to_mean),
colors = scales::col_numeric(
palette = rev(c("#b2182b", "#ef8a62", "#f7f7f7", "#67a9cf", "#2166ac")),
domain = range(
pitch_types_ranked_RR$relative_to_mean
)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(columns = c(Rank, expected_prob, relative_to_mean))
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(18))
),
locations = cells_title(groups = "title")
) %>%
tab_style(
style = list(
cell_text(weight = "bold", size = px(14))
),
locations = cells_title(groups = "subtitle")
) %>%
tab_options(
table.font.size = 13,
data_row.padding = px(5)
)
gt_RR
| Expected Optimal GIDP Contact Probabilities by Pitch Profile |
| RHP vs. RHH Matchup |
| Rank |
Pitch Profile |
Pitch Cluster |
Expected Probability |
Relative to Average |
Sample Size |
| 1 |
Sinker/2-Seam Fastball |
3 |
0.214 |
1.330 |
34464 |
| 2 |
Splitter/Changeup |
2 |
0.163 |
1.011 |
8113 |
| 3 |
Slider/Cutter |
0 |
0.150 |
0.932 |
38163 |
| 4 |
Curveball/Vertical Dropper |
4 |
0.140 |
0.871 |
22204 |
| 5 |
4-Seam Fastball |
1 |
0.138 |
0.856 |
41876 |
library(gratia)
smooth_eff_RR <- gratia::smooth_estimates(GAM_RR)
feature_rank_RR <- smooth_eff_RR %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_RR
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 ti(platelocside,platelocheight) 6.55 0.947
## 2 s(platelocside) 3.09 0.838
## 3 s(relspeed) 2.17 0.697
## 4 s(inducedvertbreak) 2.11 0.584
## 5 s(platelocheight) 1.91 0.596
## 6 ti(initposx,initposz) 1.69 0.198
## 7 s(horzbreak) 0.940 0.277
## 8 s(initposz) 0.780 0.229
## 9 s(spinrate) 0.301 0.0881
## 10 s(initposx) 0.0644 0.0192
smooth_eff_RL <- gratia::smooth_estimates(GAM_RL)
feature_rank_RL <- smooth_eff_RL %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_RL
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 s(inducedvertbreak) 2.65 0.710
## 2 s(platelocside) 2.46 0.730
## 3 s(relspeed) 1.98 0.662
## 4 ti(platelocside,platelocheight) 1.73 0.251
## 5 s(platelocheight) 1.37 0.425
## 6 s(horzbreak) 0.780 0.204
## 7 s(initposx) 0.678 0.203
## 8 ti(initposx,initposz) 0.451 0.0765
## 9 s(spinrate) 0.282 0.0914
## 10 s(initposz) 0.218 0.0599
smooth_eff_LR <- gratia::smooth_estimates(GAM_LR)
feature_rank_LR <- smooth_eff_LR %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_LR
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 ti(platelocside,platelocheight) 26.7 2.80
## 2 s(platelocside) 4.84 1.51
## 3 s(inducedvertbreak) 3.30 0.884
## 4 s(horzbreak) 3.25 0.848
## 5 s(platelocheight) 1.75 0.513
## 6 s(relspeed) 1.57 0.539
## 7 ti(initposx,initposz) 0.672 0.0803
## 8 s(spinrate) 0.639 0.214
## 9 s(initposz) 0.255 0.0871
## 10 s(initposx) 0.248 0.0727
smooth_eff_LL <- gratia::smooth_estimates(GAM_LL)
feature_rank_LL <- smooth_eff_LL %>%
group_by(.smooth) %>%
summarise(
effect_range = max(.estimate) - min(.estimate),
effect_sd = sd(.estimate)
) %>%
arrange(desc(effect_range))
feature_rank_LL
## # A tibble: 10 × 3
## .smooth effect_range effect_sd
## <chr> <dbl> <dbl>
## 1 ti(platelocside,platelocheight) 15.0 1.84
## 2 s(inducedvertbreak) 8.46 2.72
## 3 s(platelocheight) 2.15 0.629
## 4 s(platelocside) 1.75 0.483
## 5 s(relspeed) 1.67 0.518
## 6 s(initposz) 0.887 0.260
## 7 s(horzbreak) 0.829 0.289
## 8 s(spinrate) 0.309 0.0905
## 9 s(initposx) 0.165 0.0482
## 10 ti(initposx,initposz) 0.159 0.0235
plot(GAM_RR,
scale = 0, # use same y-scale across plots (better for comparison)
ylim = c(-3, 3), # fix y-axis limits
rug = TRUE, # show tick marks for data density
col = "navy",
ylab = "Partial Effect on GIDP Probability"
)










plot(GAM_RL)










plot(GAM_LR)










plot(GAM_LL)









